import pyblp
import numpy as np
import pandas as pd
#product data:
product_data = pd.read_csv(pyblp.data.BLP_PRODUCTS_LOCATION) # 2217*33
product_data.car_ids.head()
#new code to add on the domestic dummy
domestic = pd.read_csv('Data/BLP99/domestic.csv')
domestic.head()
product_data = product_data.merge(domestic, how='left', on='car_ids')
product_data.count()  
#note that domestic is last (after the instruments)
# agent data:
# this is how agent data obtained previously
agent_data_old = pd.read_csv(pyblp.data.BLP_AGENTS_LOCATION)
agent_data_old.head()
#output agent data to reverse engineer
agent_data_old.to_csv('Data/BLP99/agent_data.csv',index=False)
#here we follow the script JG sent.
# construct agent data from primitives
sds = pd.read_csv('Data/BLP99/sdincome.csv', header=None, names=['sd'], usecols=[0])
means = pd.read_csv('Data/BLP99/meanincome.csv', header=None, names=['year', 'mean'], usecols=[0, 1])
market_ids = []
weights = []
nodes = []
income = []
for index, (_, row) in enumerate(means.iterrows()):
    integration = pyblp.Integration('monte_carlo', 200,specification_options={'seed': 0})
    untransformed_agents = pyblp.build_integration(integration, 6)
    market_ids.append(np.repeat(1900 + int(row['year']), untransformed_agents.weights.size))
    weights.append(untransformed_agents.weights)
    nodes.append(untransformed_agents.nodes[:, :-1])
    income.append(np.exp(row['mean'] + float(sds['sd']) * untransformed_agents.nodes[:, -1]))
#agent_data_new = pd.DataFrame(data=agent_data)
#agent_data_new.head()
product_formulations = (
   pyblp.Formulation('1 + hpwt + air + mpd + space'),
   pyblp.Formulation('1 + prices + hpwt + air + mpd + space'),
   pyblp.Formulation('1 + log(hpwt) + air + log(mpg) + log(space) + trend')
)
product_formulations
agent_formulation = pyblp.Formulation('0 + I(1 / income)')
agent_formulation
problem = pyblp.Problem(product_formulations, product_data, agent_formulation,agent_data={
        'market_ids': np.concatenate(market_ids),
        'weights': np.concatenate(weights),
        'nodes': np.vstack(nodes),
        'income': np.concatenate(income),
    }, 
    costs_type='log')
problem
initial_sigma = np.diag([3.612, 0, 4.628, 1.818, 1.050, 2.056])
initial_sigma
initial_pi = np.c_[[0, -43.501, 0, 0, 0, 0]]
initial_pi
results = problem.solve(
    initial_sigma,
    initial_pi,
    costs_bounds=(0.001, None),
    W_type='clustered',
    se_type='clustered',
    initial_update=True,
)
results

